drop _all
set more 1
set matsize 800
capture log close


*LOAD SI FIRM DATA
use "${mastersi}\si_panel.dta" 

*PREPARE TO MERGE WITH TEMPERATURE DATA FROM NASA POWER
sort kabucode year
destring kabucode_1990, replace
merge m:1 kabucode year using "${masterweather}temp_yearly.dta"

keep if _m==3
drop _m

*DEFLAT VALUE ADDED
sort year
merge m:1 year using "${otherdata}gdpdeflator.dta"
drop _merge
replace value_added = value_added/(gdpdef/100)

*Drop obs with either wages or value_added missing
drop if wages ==. | value_added ==.


*A.DROP UNLIKELY LARGE SPIKES
*(1)Generate growth in value_added
bysort PSID (year): gen gr_va = (value_added - value_added[_n-1]) / value_added[_n-1]

* DROP ONLY FOR THE PROBLEMATIC YEARS (with growth rate in value added larger than 20)
drop if gr_va >=20 & gr_va != .


*(2)Generate growth in annual employment
bysort PSID (year): gen gr_em = (num_workers - num_workers[_n-1]) / num_workers[_n-1]
drop if gr_em >=10 & gr_em != .


*B.DROP OBSERVATIONS WITH MISREPORTING
drop if num_workers == 0  & value_added != 0 & value_added !=.
drop if num_workers == 0  & output != 0 & output !=.
drop if wages == 0  & value_added != 0 & value_added !=.
drop if wages == 0  & output != 0 & output !=.
drop if num_workers == 0  & eltrc_PLN != 0 & eltrc_PLN !=.
drop if wages == 0  & eltrc_PLN != 0 & eltrc_PLN !=.
drop if cap_est<0

*C.Drop observations with unrealistically high and unrealistically low labor intensities
gen firmlabint = wages/value_added
drop if firmlabint <=0.005 | firmlabint >= 2

*D. DROP OBSERVATIONS WITH NEGATIVE CAPITAL
drop if cap_est<0

*F1. CLEAN UP THE START YEARS
bys PSID: egen birthinsample = min(year)

gen entryyear = birthinsample if birthinsample!=1983

*ONLY KEEP YEARS THAT ARE AFTER 2001
keep if year>=2001

*ONLY KEEP FIRMS THAT WERE IN THE SAMPLE IN 2001
bys PSID: egen firstyearinsample_2001 = min(year)
drop if firstyearinsample_2001 !=2001

*FIX EXPORT VALUES
	gen b1_100 = (expShare > 1 & expShare <= 100)
		replace expShare = expShare / 100 if b1_100 == 1
		drop b1_100
		
	gen g1 = (expShare > 1 & expShare < .)	
		replace expShare = expShare / 1000 if g1 == 1
		drop g1

	replace exporter = 0 if exporter == 2
	replace exporter = 0 if (exporter >= 1.5 & exporter < .)
	replace exporter = 1 if (exporter > 0.75 & exporter < .)
	replace exporter = 0 if exporter < 0.75
	replace expShare = 0 if exporter == 0

	
xtset PSID year
sort PSID year
*last-year in sample as exiting year if it's not 2012
by PSID: generate exit = _n == _N
*exit as last year before a gap
sort PSID year

replace exit = . if year == 2012

sort PSID year
order PSID year exit birthinsample entryyear firstyearinsample_2001 

save "${mastersi}si_panel_exit0112.dta", replace


***********************************************************************
***********************************************************************
*LP productivity estimates

use "${mastersi}si_panel_exit0112.dta", clear

*using prodest package

xtset PSID year

gen prod2 = floor(prod3/10)

drop prod3

su value_added wages raw_mat cap_est expenses_elect

foreach var of varlist value_added wages raw_mat cap_est expenses_elect{
	
	gen log_`var' = log(`var')
	
}


gen tfp_wrdg_raw1 = .


forval i = 31/39{

prodest log_value_added if prod2 == `i', free(log_wages) state(log_cap_est) proxy(log_raw_mat) va met(wrdg) poly(2) id(PSID) t(year)

predict tfp_wrdg_raw, residuals

replace tfp_wrdg_raw1 = tfp_wrdg_raw if tfp_wrdg_raw1 == . 

drop tfp_wrdg_raw
}


keep PSID tfp_wrdg_raw1 year prod2

reshape wide tfp_wrdg_raw1, i(PSID) j(year)

egen pdty_wrdg = rowmean(tfp_wrdg*)

egen BinPdty3_initialprod2_wrdg = xtile(pdty_wrdg), by(prod2) nq(3)

label var BinPdty3_initialprod2 "prod2 xtile(3) based on pdty_wrdg"

drop tfp*

sort PSID 

save "${interpdty}pdtybin_wrdg_exit0112.dta", replace


***********************************************************************
***********************************************************************
*Construct initial productivity ranks
clear
use "${mastersi}si_panel_exit0112.dta"

gen va_pwk = value_added/num_workers

*********************************************************************
*Generate firm-specific time-invariant productivity indicators
keep PSID year va_pwk prod3 exporter

bysort PSID: egen meanprod3 = mean(prod3)
*Drop the two plants that switched prod3 categories
drop if meanprod3 != prod3

reshape wide va_pw exporter, i(PSID) j(year)

gen prod2 = floor(prod3/10)

*Using 2001 productivity rank
egen BinPdty3_initialprod2 = xtile(va_pwk2001), by(prod2) nq(3)
label var BinPdty3_initialprod2 "prod2 xtile(3) based on va_pwk2001, unbalanced"

keep PSID va_pwk2001 BinPdty3_initialprod* prod2 exporter2001
sort PSID

save "${interpdty}pdtybin_exit0112.dta", replace



***********************************************************************
***********************************************************************
*Assemble data for exit analysis
clear
use "${mastersi}si_panel_exit0112.dta"
sort PSID
*Merge with yearly average industry ranked firm-specific productivity bins
merge m:1 PSID using "${interpdty}pdtybin_exit0112.dta"
keep if _m==3
drop _merge
drop prod2

*Merge with yearly average industry-specific tfp estimated using wrdg method
merge m:1 PSID using "${interpdty}pdtybin_wrdg_exit0112.dta"
drop _merge
drop prod2

gen lvladded = ln(value_added)
gen lflabd = ln(num_workers)

gen labor_nonprod = num_workers-laborCnt_prod
gen lmp_ratio = ln(labor_nonprod/laborCnt_prod)

gen lCaptoLabor = ln(cap_est/num_workers)
gen llabor_nonprod = ln(labor_nonprod)
gen lLabor = ln(num_workers)
gen lCap = ln(cap_est)
gen llabor_prod = ln(laborCnt_prod)
gen lprofit = ln(totProfits)
gen lCaptoProd = ln(cap_est/laborCnt_prod)
gen lsales = ln(income_goodsProd)
gen lwages = ln(wages)
gen lexp_rawMat = ln(expenses_rawMat)
gen lexp_rawMatimported = ln(rawImports)
gen lexp_eltrc = ln(expenses_elec)
gen lrawimp_ratio = ln(rawImports/expenses_rawMat)


*Generate product*year fixed effects
gen prod2 = floor(prod3/10)
sort prod2
by prod2: tab prod3
egen yearXprod2= group(year prod2)
sort year prod2
order year prod2 yearXprod2

*Generate province*year fixed effects
gen prv = floor(kabu/100)
sort prv
by prv: tab kabu
egen yearXprv= group(year prv)
sort year prv
order year prv yearXprv

gen island = .
replace island = 1 if prv<30
replace island = 2 if prv>=30 & prv<60
replace island = 3 if prv>=60 & prv<70
replace island = 4 if prv>=70

egen yearXis= group(year island)

egen yearXkabu = group(year kabu)

save "${data}analysis_exit0112.dta", replace





































